

****************************
* Summary stats and graphs *
****************************
		
************************ tables with summary stats
* a) outlet-level: items, pre_switch_tw_foll, national, tabloid
use "fb tw micro data w vars.dta", clear
keep if platform == "facebook"

collapse (mean) tabloid national pre_switch_tw_foll quality, by(outlet)

gen broadsheet = 0
replace broadsheet = 1 if tabloid == 0
gen regional = 0
replace regional = 1 if national == 0

eststo clear
quietly estpost summarize tabloid broadsheet quality national regional ///
 pre_switch_tw_foll
esttab, cells("mean(fmt(2)) sd(fmt(2)) min(fmt(2)) max(fmt(2)) count(fmt(0))") ///
 compress nonumbers noobs label modelwidth(12) ///
 title(Table A2: Summary statistics of outlet-level variables) varwidth(44) ///
 refcat(tabloid "Type" national "Scope") ///
 coeflabels(tabloid "-tabloid (binary)" ///
 broadsheet "-broadsheet (binary)" ///
 quality "Wellbrock (2011) quality index" ///
 national "-national (binary)" ///
 regional "-regional (binary)" ///
 pre_switch_tw_foll "Number of pre-switch Twitter followers")

 
* b) tweet/post level
use "fb tw micro data w vars.dta", clear
eststo clear
replace platform = "1_twitter" if platform == "twitter"
replace platform = "2_facebook" if platform == "facebook"
bysort platform: eststo: quietly estpost summarize likes shares start_all ///
 number_of_words mean_word_length questionmarks exclamationmarks ///
 share_neg_words share_pos_words
esttab, cells("mean(fmt(3)) sd(fmt(3))") compress nonumbers label ///
 title(Table A3: Summary statistics of tweet/post-level variables) varwidth(44) ///
 refcat(likes "User engagement" start_all "Message characteristics" congenial "Ideology") ///
 coeflabels(likes "-number of likes" ///
 shares "-number of retweets/shares" ///
 start_all "-starts with question word (binary)" ///
 number_of_words "-number of words" ///
 mean_word_length "-mean word length" ///
 questionmarks "-number of question marks" ///
 exclamationmarks "-number of exclamation marks" ///
 share_neg_words "-share of negative words" ///
 share_pos_words "-share of positive words" ///
 congenial "-like-minded (binary)" ///
 uncongenial "-counter-attitudinal (binary)" ///
 mixed "-ambiguous (binary)" ///
 unpolitical "-no ideological expressions (binary)") ///
 addnotes("Notes: The observations include all tweets and posts within a window of +/-30 days around Twitter's switch to algorithmic curation.")
 
 
 
 
 
* c) topic level (Table A4) 
use "fb tw micro data w vars.dta", clear
quietly estpost tabulate dominant_name, sort
eststo clear
esttab, compress cells("pct(fmt(2))") noobs nonotes varlabels(`e(labels)')
	
	
	
**************************** figures
* Figure 1: Number of likes over time
* Notes: The black vertical line denotes the introduction of Twitter’s content selection algorithm.
use "fb tw micro data w vars.dta", clear
* drop one outlier
drop if likes >= 164851
sort date platform, stable
by date platform: egen mean_likes = mean(likes)
sort platform, stable
by platform: egen global_likes_7bef = mean(likes) if rel_date >= -7 & rel_date <= -1
by platform: egen global_likes_7aft = mean(likes) if rel_date >= 1 & rel_date <= 7

* Panel a): Number of likes
twoway (line mean_likes date if platform == "facebook" & ///
        rel_date >= -7 & rel_date <= 7, lpattern(shortdash) lcolor(black) yaxis(1)) ///
	   (line global_likes_7aft date if platform == "facebook" & ///
        rel_date >= 1 & rel_date <= 7, lpattern(solid) lwidth(thin) lcolor(gs4) yaxis(1)) ///
	   (line global_likes_7bef date if platform == "facebook" & ///
        rel_date >= -7 & rel_date <= -1, lpattern(solid) lwidth(thin) lcolor(gs4) yaxis(1)) ///
       (line mean_likes date if platform == "twitter" & ///
        rel_date >= -7 & rel_date <= 7, lpattern(longdash) lcolor(black) yaxis(2)) ///
	   (line global_likes_7aft date if platform == "twitter" & ///
        rel_date >= 1 & rel_date <= 7, lpattern(solid) lwidth(thin) lcolor(gs4) yaxis(2)) ///
	   (line global_likes_7bef date if platform == "twitter" & ///
        rel_date >= -7 & rel_date <= -1, lpattern(solid) lwidth(thin) lcolor(gs4) yaxis(2)), ///
    scheme(s2mono) graphregion(color(white)) ///
	ytitle("Facebook likes" " ", size(medsmall) axis(1)) ///
	ytitle("Twitter likes" " ", size(medsmall) axis(2)) ///
    ylabel(0(100)400, axis(1) angle(0) gmax gmin) ///
	ylabel(0(2.5)10, axis(2) angle(0) gmax gmin) ///
	xtitle(" ") xline(20530, lpattern(solid) lwidth(medthin) lcolor(gs6)) ///
	xlabel(20524 "March 11" 20530 "March 17" ///
	20535 "March 22") ///
	legend(rows(1) order(1 "Facebook" 4 "Twitter" 2 "Means")) scale(1.15)
	
	
* Panel b: Number of retweets/shares
use "fb tw micro data w vars.dta", clear
* drop one outlier
drop if likes >= 164851
sort date platform, stable
by date platform: egen mean_shares = mean(shares)
sort platform, stable
by platform: egen global_shares_7bef = mean(shares) if rel_date >= -7 & rel_date <= -1
by platform: egen global_shares_7aft = mean(shares) if rel_date >= 1 & rel_date <= 7

twoway (line mean_shares date if platform == "facebook" & ///
        rel_date >= -7 & rel_date <= 7, lpattern(shortdash) lcolor(black) yaxis(1)) ///
	   (line global_shares_7aft date if platform == "facebook" & ///
        rel_date >= 1 & rel_date <= 7, lpattern(solid) lwidth(thin) lcolor(gs4) yaxis(1)) ///
	   (line global_shares_7bef date if platform == "facebook" & ///
        rel_date >= -7 & rel_date <= -1, lpattern(solid) lwidth(thin) lcolor(gs4) yaxis(1)) ///
       (line mean_shares date if platform == "twitter" & ///
        rel_date >= -7 & rel_date <= 7, lpattern(longdash) lcolor(black) yaxis(2)) ///
	   (line global_shares_7aft date if platform == "twitter" & ///
        rel_date >= 1 & rel_date <= 7, lpattern(solid) lwidth(thin) lcolor(gs4) yaxis(2)) ///
	   (line global_shares_7bef date if platform == "twitter" & ///
        rel_date >= -7 & rel_date <= -1, lpattern(solid) lwidth(thin) lcolor(gs4) yaxis(2)), ///
    scheme(s2mono) graphregion(color(white)) ///
	ytitle("Facebook shares" " ", size(medsmall) axis(1)) ///
	ytitle("Twitter retweets" " ", size(medsmall) axis(2)) ///
    ylabel(0(25)100, axis(1) angle(0) gmax gmin) ///
	ylabel(0(2)8, axis(2) angle(0) gmax gmin) ///
	xtitle(" ") xline(20530, lpattern(solid) lwidth(medthin) lcolor(gs6)) ///
	xlabel(20524 "March 11" 20530 "March 17" ///
	20535 "March 22") ///
	legend(rows(1) order(1 "Facebook" 4 "Twitter" 2 "Means")) scale(1.15)


	
* Figure A6: Global development of the number Twitter users, 2013 - 2017
* Notes: Data based on Twitter's annual reports to stakeholders. The black vertical line denotes the introduction of Twitter’s content selection algorithm.
import excel "intermediate files and code/monthly active users.xlsx", sheet("Twitter") firstrow clear
gen tq = yq(year, quarter)
format tq %tq
tsset tq
twoway (line mau tq) if tq > tq(2012q4), scheme(s2mono) graphregion(color(white)) ///
       ylabel(0(100)400, angle(0) gmin gmax) ytitle("Monthly active users" " ") ///
	   xtitle(" " "Time") ///
	   xlabel(212 "2013" 216 "2014" 220 "2015" 224 "2016" 228 "2017") ///
       xline(224.8, lpattern(solid) lwidth(medthin) lcolor(gs6)) scale(1.1)
 

		
* Plot tweet characteristics before and after the switch
use "fb tw micro data w vars.dta", clear
keep if rel_date >= -7 & rel_date <= 7 & platform == "twitter"
collapse (mean) items start_all number_of_words mean_word_length questionmarks exclamationmarks ///
 share_neg_words share_pos_words (sd) sd_items=items sd_start_all=start_all ///
 sd_number_of_words=number_of_words sd_mean_word_length=mean_word_length ///
 sd_questionmarks=questionmarks sd_exclamationmarks=exclamationmarks ///
 sd_share_neg_words=share_neg_words sd_share_pos_words=share_pos_words ///
 (count) n=items, by(after_march16)
 
generate hi_items = items + invttail(n-1,0.025)*(sd_items / sqrt(n))
generate lo_items = items - invttail(n-1,0.025)*(sd_items / sqrt(n))

generate hi_start_all = start_all + invttail(n-1,0.025)*(sd_start_all / sqrt(n))
generate lo_start_all = start_all - invttail(n-1,0.025)*(sd_start_all / sqrt(n))

generate hi_number_of_words = number_of_words + invttail(n-1,0.025)*(sd_number_of_words / sqrt(n))
generate lo_number_of_words = number_of_words - invttail(n-1,0.025)*(sd_number_of_words / sqrt(n))

generate hi_mean_word_length = mean_word_length + invttail(n-1,0.025)*(sd_mean_word_length / sqrt(n))
generate lo_mean_word_length = mean_word_length - invttail(n-1,0.025)*(sd_mean_word_length / sqrt(n))

generate hi_questionmarks = questionmarks + invttail(n-1,0.025)*(sd_questionmarks / sqrt(n))
generate lo_questionmarks = questionmarks - invttail(n-1,0.025)*(sd_questionmarks / sqrt(n))

generate hi_exclamationmarks = exclamationmarks + invttail(n-1,0.025)*(sd_exclamationmarks / sqrt(n))
generate lo_exclamationmarks = exclamationmarks - invttail(n-1,0.025)*(sd_exclamationmarks / sqrt(n))

generate hi_share_neg_words = share_neg_words + invttail(n-1,0.025)*(sd_share_neg_words / sqrt(n))
generate lo_share_neg_words = share_neg_words - invttail(n-1,0.025)*(sd_share_neg_words / sqrt(n))

generate hi_share_pos_words = share_pos_words + invttail(n-1,0.025)*(sd_share_pos_words / sqrt(n))
generate lo_share_pos_words = share_pos_words - invttail(n-1,0.025)*(sd_share_pos_words / sqrt(n))

* Figure A4: Characteristics of tweets before and after the introduction of algorithmic curation
* Notes: Observations include all tweets within a window of +/-7 days around the switch. The error bars represent the 95% confidence interval.
* a) Daily total number of tweets
graph twoway (bar items after_march16,  barwidth(0.5)) ///
 (rcap hi_items lo_items after_march16), /// 
 scheme(s2mono) legend(off) ytitle("Mean") xscale(range(-0.5 1.5)) ///
 xlabel(0 "Before" 1 "After") xtitle("") ylabel(, angle(0) format(%5.0f)) ///
 graphregion(color(white)) plotregion(margin(zero)) scale(2)
* b) Tweet starts with question word (binary)
graph twoway (bar start_all after_march16,  barwidth(0.5)) ///
 (rcap hi_start_all lo_start_all after_march16), /// 
 scheme(s2mono) legend(off) ytitle("Mean") xscale(range(-0.5 1.5)) ///
 xlabel(0 "Before" 1 "After") xtitle("") ylabel(, angle(0) format(%5.3f)) ///
 graphregion(color(white)) plotregion(margin(zero)) scale(2)	
* c) Number of words
graph twoway (bar number_of_words after_march16,  barwidth(0.5)) ///
 (rcap hi_number_of_words lo_number_of_words after_march16), /// 
 scheme(s2mono) legend(off) ytitle("Mean") xscale(range(-0.5 1.5)) ///
 xlabel(0 "Before" 1 "After") xtitle("") ylabel(, angle(0) format(%5.2f)) ///
 graphregion(color(white)) plotregion(margin(zero)) scale(2)
* d) Word length (characters)
graph twoway (bar mean_word_length after_march16,  barwidth(0.5)) ///
 (rcap hi_mean_word_length lo_mean_word_length after_march16), /// 
 scheme(s2mono) legend(off) ytitle("Mean") xscale(range(-0.5 1.5)) ///
 xlabel(0 "Before" 1 "After") xtitle("") ylabel(, angle(0) format(%5.1f)) ///
 graphregion(color(white)) plotregion(margin(zero)) scale(2)
* e) Number of question marks
graph twoway (bar questionmarks after_march16,  barwidth(0.5)) ///
 (rcap hi_questionmarks lo_questionmarks after_march16), /// 
 scheme(s2mono) legend(off) ytitle("Mean") xscale(range(-0.5 1.5)) ///
 xlabel(0 "Before" 1 "After") xtitle("") ylabel(, angle(0) format(%5.3f)) ///
 graphregion(color(white)) plotregion(margin(zero)) scale(2)
* f) Number of exclamation marks
graph twoway (bar exclamationmarks after_march16,  barwidth(0.5)) ///
 (rcap hi_exclamationmarks lo_exclamationmarks after_march16), /// 
 scheme(s2mono) legend(off) ytitle("Mean") xscale(range(-0.5 1.5)) ///
 xlabel(0 "Before" 1 "After") xtitle("") ylabel(, angle(0) format(%5.3f)) ///
 graphregion(color(white)) plotregion(margin(zero)) scale(2)
* g) Share of negative words
graph twoway (bar share_neg_words after_march16,  barwidth(0.5)) ///
 (rcap hi_share_neg_words lo_share_neg_words after_march16), /// 
 scheme(s2mono) legend(off) ytitle("Mean") xscale(range(-0.5 1.5)) ///
 xlabel(0 "Before" 1 "After") xtitle("") ylabel(, angle(0) format(%5.3f)) ///
 graphregion(color(white)) plotregion(margin(zero)) scale(2)
* h) Share of positive words
graph twoway (bar share_pos_words after_march16,  barwidth(0.5)) ///
 (rcap hi_share_pos_words lo_share_pos_words after_march16), /// 
 scheme(s2mono) legend(off) ytitle("Mean") xscale(range(-0.5 1.5)) ///
 xlabel(0 "Before" 1 "After") xtitle("") ylabel(, angle(0) format(%5.3f)) ///
 graphregion(color(white)) plotregion(margin(zero)) scale(2)

* manually add difference in means in standard errors
use "fb tw micro data w vars.dta", clear
keep if rel_date >= -7 & rel_date <= 7 & platform == "twitter" 
 
ttest items, by(after_march16)
ttest start_all, by(after_march16)
ttest number_of_words, by(after_march16)
ttest mean_word_length, by(after_march16)
ttest questionmarks, by(after_march16)
ttest exclamationmarks, by(after_march16)
ttest share_neg_words, by(after_march16)
ttest share_pos_words, by(after_march16)


* Figure A5: Time of publication of tweets, before and after the introduction of algorithmic curation
* Notes: Observations include all tweets within a window of +/-7 days around the switch.
use "fb tw micro data w vars.dta", clear
keep if platform == "twitter" & rel_date >= -7 & rel_date <= 7
contract after_march16 hour if !missing(after_march16, hour)
egen _percent = pc(_freq), by(after_march16)
separate _percent, by(after_march16)

gen hour0 = hour - 0.2
gen hour1 = hour + 0.2

twoway bar _percent0 hour0, base(0) barw(0.4) bc(gs10) scheme(s2mono) graphregion(color(white)) ///
|| bar _percent1 hour1, barw(0.4) bc(gs6) ytitle(Percent) xtitle(" " "Hour of publication") ///
 xlabel(0(2)23) scale(1.2) ylabel(, angle(0)) ///
 legend(rows(1) order(1 "Before" 2 "After"))
	
	
	

	
	
	

		
***************
* Regressions *
***************		
		
global notes_nbreg = "The table shows exponentiated coefficients (i.e., incidence rate ratios) of negative binomial regressions."
global notes_controls = "All models include a constant, day fixed effects, outlet-platform fixed effects, and an outlet-platform specific trend polynomial of order 3 (output omitted)."
global notes_se = "Standard errors (in parentheses) are clustered at the outlet-platform level and refer to the exponentiated coefficients."
global notes_windows = "The column headers denote the dependent variable and estimation window."

* Interpretation of IRR: https://www.stata.com/statalist/archive/2010-03/msg01483.html
* (simply as factor or percent); e.g., an IRR of 1.185 implies increase of 18.5%

* Table 1 baseline
use "fb tw micro data w vars.dta", clear
eststo clear
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
		mtitles("+/- 7 days" "+/- 14 days" "+/- 30 days" "+/- 7 days" "+/- 14 days" "+/- 30 days") ///
		title(Table 1: Effects of algorithmic curation on user engagement) ///
	    varwidth(30) modelwidth(13) ///
		drop(0.twitter 0.after_march16 0.twitter#0.after_march16 0.twitter#1.after_march16 ///
		1.twitter#0.after_march16 *outlet_platform_id* *date* *outlet_trend* _cons) ///
		coeflabels(1.twitter "Twitter" ///
		1.after_march16 "After" ///
		1.twitter#1.after_march16 "Twitter x after") ///
		addnotes("Notes: $notes_nbreg $notes_windows $notes_controls $notes_se" ///
		"* p < 0.10, ** p < 0.05, *** p < 0.01")

		
		
* Table A5 robustness: matched tweets/posts
* using https://carloscinelli.shinyapps.io/robustness_value/ to compute "partial R2 of treatment"
* and "robustness value", assuming 72 clusters - 1 = 71 degrees of freedom
use "fb tw micro data w vars.dta", clear
global notes_match = "The sample used in Panel A includes posts and tweets with a cosine similarity larger than 0.8, whereas Panel B includes posts and tweets with a cosine similarity below 0.2. For comparison, Panel C shows the same results as from the baseline model in Table 1." 
global notes_controls_match = "All models include a constant, day fixed effects, and outlet-platform fixed effects (output omitted)."
global notes_sens = "Partial R2 of treatment indicates how much of the variation in the engagement metrics is explained by Twitter's switch, whereas Robustness value indicates how strongly unobserved confounders would have to be associated with both user engagement and the treatment – measured in terms of partial R2 – to drive the treatment effect to zero (see Cinelli and Hazlett, 2020)."
* Panel A: Tweets/posts with counterpart on other platform
eststo clear
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -7 & rel_date <= 7 & max_c > 0.8, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -14 & rel_date <= 14 & max_c > 0.8, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -30 & rel_date <= 30 & max_c > 0.8, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -7 & rel_date <= 7 & max_c > 0.8, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -14 & rel_date <= 14 & max_c > 0.8, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -30 & rel_date <= 30 & max_c > 0.8, cluster(outlet_platform_id) link(log) family(nb)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
		mtitles("+/- 7 days" "+/- 14 days" "+/- 30 days" "+/- 7 days" "+/- 14 days" "+/- 30 days") ///
		title(Table A5: Effects of algorithmic curation on user engagement, using samples of matched and unmatched tweets/posts) ///
	    varwidth(30) modelwidth(13) ///
		keep(1.twitter#1.after_march16) ///
		coeflabels(1.twitter#1.after_march16 "Twitter x after")	
* Panel B: Tweets/posts without counterpart on other platform
eststo clear
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -7 & rel_date <= 7 & max_c < 0.2, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -14 & rel_date <= 14 & max_c < 0.2, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -30 & rel_date <= 30 & max_c < 0.2, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -7 & rel_date <= 7 & max_c < 0.2, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -14 & rel_date <= 14 & max_c < 0.2, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id ///
 if rel_date >= -30 & rel_date <= 30 & max_c < 0.2, cluster(outlet_platform_id) link(log) family(nb)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
	    varwidth(30) modelwidth(13) nomtitles ///
		keep(1.twitter#1.after_march16) ///
		coeflabels(1.twitter#1.after_march16 "Twitter x after")
* Panel C: All tweets/posts
eststo clear
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
	    varwidth(30) modelwidth(13) nomtitles ///
		keep(1.twitter#1.after_march16) ///
		coeflabels(1.twitter#1.after_march16 "Twitter x after") ///
		addnotes("Notes: $notes_nbreg $notes_windows $notes_match $notes_controls_match $notes_sens $notes_se" ///
		"* p < 0.10, ** p < 0.05, *** p < 0.01")		
		
		
		
		
* Table A6 robustness OLS log transformation
use "fb tw micro data w vars.dta", clear
global notes_se_ols = "Standard errors (in parentheses) are clustered at the outlet-platform level."
eststo clear
quietly eststo: reg log_likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id)
quietly eststo: reg log_likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id)
quietly eststo: reg log_likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id)
quietly eststo: reg log_shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id)
quietly eststo: reg log_shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id)
quietly eststo: reg log_shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id)
esttab, star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("r2_a Adjusted R2" "N Observations") sfmt(3 0) ///
		mtitles("+/- 7 days" "+/- 14 days" "+/- 30 days" "+/- 7 days" "+/- 14 days" "+/- 30 days") ///
		title(Table A6: Effects of algorithmic curation on user engagement (OLS estimates I)) ///
	    varwidth(30) modelwidth(13) ///
		drop(0.twitter 0.after_march16 0.twitter#0.after_march16 0.twitter#1.after_march16 ///
		1.twitter#0.after_march16 *outlet_platform_id* *date* *outlet_trend* _cons) ///
		coeflabels(1.twitter "Twitter" ///
		1.after_march16 "After" ///
		1.twitter#1.after_march16 "Twitter x after") ///
		addnotes("Notes: $notes_windows $notes_controls $notes_se_ols" ///
		"* p < 0.10, ** p < 0.05, *** p < 0.01")
	
	

* Table A7 robust OLS inverse hyperbolic sine transformation
use "fb tw micro data w vars.dta", clear
eststo clear		
quietly eststo: reg asinh_likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id)
quietly eststo: reg asinh_likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id)
quietly eststo: reg asinh_likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id)
quietly eststo: reg asinh_shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id)
quietly eststo: reg asinh_shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id)
quietly eststo: reg asinh_shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id)
esttab, star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("r2_a Adjusted R2" "N Observations") sfmt(3 0) ///
		mtitles("+/- 7 days" "+/- 14 days" "+/- 30 days" "+/- 7 days" "+/- 14 days" "+/- 30 days") ///
		title(Table A7: Effects of algorithmic curation on user engagement (OLS estimates II)) ///
	    varwidth(30) modelwidth(13) ///
		drop(0.twitter 0.after_march16 0.twitter#0.after_march16 0.twitter#1.after_march16 ///
		1.twitter#0.after_march16 *outlet_platform_id* *date* *outlet_trend* _cons) ///
		coeflabels(1.twitter "Twitter" ///
		1.after_march16 "After" ///
		1.twitter#1.after_march16 "Twitter x after") ///
		addnotes("Notes: $notes_windows $notes_controls $notes_se_ols" ///
		"* p < 0.10, ** p < 0.05, *** p < 0.01")
		
		
		
		
		

		
* Table A8 placebo likes
use "fb tw micro data placebo.dta", clear
global notes_placebo = "The column headers denote fictional treatment dates. All estimates refer to the +/- 7-day windows around these dates."
* add super column header: Fictional treatment date:
eststo clear
gen after = after_mar16_2015
quietly eststo: glm likes twitter##after i.date i.outlet_platform_id ///
 outlet_trend* if rel_date2015_mar >= -7 & rel_date2015_mar <= 7, cluster(outlet_platform_id) link(log) family(nb)
drop after
gen after = after_dec16_2015
quietly eststo: glm likes twitter##after i.date i.outlet_platform_id ///
 outlet_trend* if rel_date2015_dec >= -7 & rel_date2015_dec <= 7, cluster(outlet_platform_id) link(log) family(nb)
drop after
gen after = after_jun16_2016
quietly eststo: glm likes twitter##after i.date i.outlet_platform_id ///
 outlet_trend* if rel_date2016_jun >= -7 & rel_date2016_jun <= 7, cluster(outlet_platform_id) link(log) family(nb)
drop after
gen after = after_mar16_2017
quietly eststo: glm likes twitter##after i.date i.outlet_platform_id ///
 outlet_trend* if rel_date2017_mar >= -7 & rel_date2017_mar <= 7, cluster(outlet_platform_id) link(log) family(nb)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
		title(Table A8: Effects of algorithmic curation on likes (placebo regressions)) ///
		mtitles("12 months before switch" "3 months before switch" ///
		"3 months after switch" "12 months after switch") ///
		keep(1.twitter 1.after 1.twitter#1.after) ///
		coeflabels(1.twitter "Twitter" ///
		1.after "After" ///
		1.twitter#1.after "Twitter x after") ///
		addnotes("Notes: $notes_nbreg Dependent variable: number of likes. $notes_placebo $notes_controls $notes_se" ///
		"* p < 0.10, ** p < 0.05, *** p < 0.01")
		
		
* Table A9 placebo shares
use "fb tw micro data placebo.dta", clear
global notes_placebo = "The column headers denote fictional treatment dates. All estimates refer to the +/- 7-day windows around these dates."
* add super column header: Fictional treatment date:
eststo clear
gen after = after_mar16_2015
quietly eststo: glm shares twitter##after i.date i.outlet_platform_id ///
 outlet_trend* if rel_date2015_mar >= -7 & rel_date2015_mar <= 7, cluster(outlet_platform_id) link(log) family(nb)
drop after
gen after = after_dec16_2015
quietly eststo: glm shares twitter##after i.date i.outlet_platform_id ///
 outlet_trend* if rel_date2015_dec >= -7 & rel_date2015_dec <= 7, cluster(outlet_platform_id) link(log) family(nb)
drop after
gen after = after_jun16_2016
quietly eststo: glm shares twitter##after i.date i.outlet_platform_id ///
 outlet_trend* if rel_date2016_jun >= -7 & rel_date2016_jun <= 7, cluster(outlet_platform_id) link(log) family(nb)
drop after
gen after = after_mar16_2017
quietly eststo: glm shares twitter##after i.date i.outlet_platform_id ///
 outlet_trend* if rel_date2017_mar >= -7 & rel_date2017_mar <= 7, cluster(outlet_platform_id) link(log) family(nb)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
		title(Table A9: Effects of algorithmic curation on retweets/shares (placebo regressions)) ///
		mtitles("12 months before switch" "3 months before switch" ///
		"3 months after switch" "12 months after switch") ///
		keep(1.twitter 1.after 1.twitter#1.after) ///
		coeflabels(1.twitter "Twitter" ///
		1.after "After" ///
		1.twitter#1.after "Twitter x after") ///
		addnotes("Notes: $notes_nbreg Dependent variable: number of retweets/shares. $notes_placebo $notes_controls $notes_se" ///
		"* p < 0.10, ** p < 0.05, *** p < 0.01")
	
	

* Table A10 robustness sum of likes / outlet-day level
use "fb tw micro data w vars.dta", clear

collapse (sum) likes shares (mean) twitter after_march16 rel_date ///
 outlet_trend outlet_trend_sq outlet_trend_tri, by(date outlet_platform_id)
 
global notes_nbreg_obs = "The table shows exponentiated coefficients (i.e., incidence rate ratios) of negative binomial regressions, using data at the outlet-platform-day level."
global notes_dep_likes3 = "The dependent variable is the sum of likes or sum of retweets/shares per outlet over all tweets or posts on a given day."
eststo clear
quietly eststo: nbreg likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) d(c)
quietly eststo: nbreg likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) d(c)
quietly eststo: nbreg likes twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) d(c)
quietly eststo: nbreg shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) d(c)
quietly eststo: nbreg shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) d(c)
quietly eststo: nbreg shares twitter##after_march16 i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) d(c)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
		mtitles("+/- 7 days" "+/- 14 days" "+/- 30 days" "+/- 7 days" "+/- 14 days" "+/- 30 days") ///
		title(Table A10: Effects of algorithmic curation on daily total user engagement) ///
		varwidth(30) modelwidth(13) ///
		drop(0.twitter 0.after_march16 0.twitter#0.after_march16 0.twitter#1.after_march16 ///
		1.twitter#0.after_march16 *outlet_platform_id* *date* *outlet_trend* _cons) ///
		coeflabels(1.twitter "Twitter" ///
		1.after_march16 "After" ///
		1.twitter#1.after_march16 "Twitter x after") ///
		addnotes("Notes: $notes_nbreg_obs $notes_dep_likes3 $notes_controls $notes_se" ///
		"* p < 0.10, ** p < 0.05, *** p < 0.01")
 
 
 
	


*********************
* By type of outlet *
*********************
global notes_nbreg = "The table shows exponentiated coefficients (i.e., incidence rate ratios) of negative binomial regressions."
global notes_se = "Standard errors (in parentheses) are clustered at the outlet-platform level."
global notes_windows = "The column headers denote the dependent variable and estimation window."
global notes_controls_interact = "All models include a constant, day fixed effects, outlet-platform fixed effects, an outlet-platform specific trend polynomial of order 3, and all constituent terms of the interactions (output omitted)."

use "fb tw micro data w vars.dta", clear
eststo clear
* Panel A: Journalistic approach (reference: broadsheet)		
quietly eststo: glm likes twitter##after_march16##tabloid i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##tabloid i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##tabloid i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##tabloid i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##tabloid i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##tabloid i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
		mtitles("+/- 7 days" "+/- 14 days" "+/- 30 days" "+/- 7 days" "+/- 14 days" "+/- 30 days") ///
		title(Table 2: Effects of algorithmic curation on user engagement, by newspaper characteristics) ///
	    varwidth(30) modelwidth(13) ///
		keep(1.twitter#1.after_march16#1.tabloid) ///
		coeflabels(1.twitter#1.after_march16#1.tabloid "Twitter x after x tabloid")
* Panel B: Wellbrock (2011) quality index (continuous)	
eststo clear
quietly eststo: glm likes twitter##after_march16##c.quality i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##c.quality i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##c.quality i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##c.quality i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##c.quality i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##c.quality i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
		mtitles("+/- 7 days" "+/- 14 days" "+/- 30 days" "+/- 7 days" "+/- 14 days" "+/- 30 days") ///
		title(Table 2: Effects of algorithmic curation on user engagement, by type of outlet) ///
	    varwidth(30) modelwidth(13) ///
		keep(1.twitter#1.after_march16#c.quality) ///
		coeflabels(1.twitter#1.after_march16#c.quality "Twitter x after x quality")		
* Panel C: Scope (reference: regional)
eststo clear	
quietly eststo: glm likes twitter##after_march16##national i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##national i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##national i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##national i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##national i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##national i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
	    varwidth(30) modelwidth(13) ///
		keep(1.twitter#1.after_march16#1.national) ///
		coeflabels(1.twitter#1.after_march16#1.national "Twitter x after x national") ///
		addnotes("Notes: $notes_nbreg $notes_windows $notes_controls_interact $notes_se" ///
		"* p < 0.10, ** p < 0.05, *** p < 0.01")
		
		
		

*******************************
* Outlet and topic popularity *
*******************************		
		
* popular topic = over past 30 days; determined by number of Twitter likes
use "fb tw micro data w vars.dta", clear
keep if platform == "twitter"

collapse (mean) likes, by(dominant_name)
rename likes topic_likes

table dominant_name, contents(mean topic_likes)
save "temp_topic.dta", replace

use "fb tw micro data w vars.dta", clear

merge m:1 dominant_name using "temp_topic.dta"
drop _merge
erase "temp_topic.dta"

xtile qt_pre_foll=pre_switch_tw_foll, nq(5)
xtile qt_topic_likes=topic_likes, nq(5)

************** tables
* Panel A: Pre-switch number of followers (reference category: 1st quintile)
eststo clear
quietly eststo: glm likes twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
		mtitles("+/- 7 days" "+/- 14 days" "+/- 30 days" "+/- 7 days" "+/- 14 days" "+/- 30 days") ///
		title(Table A11: Effects of algorithmic curation on user engagement, by outlet and content popularity) ///
	    varwidth(30) modelwidth(13) ///
	    keep(1.twitter#1.after_march16#2* 1.twitter#1.after_march16#3* 1.twitter#1.after_march16#4* ///
		1.twitter#1.after_march16#5*) ///
	    coeflabels(1.twitter#1.after_march16#2.qt_pre_foll "2nd quintile" ///
		1.twitter#1.after_march16#3.qt_pre_foll "3rd quintile" ///
		1.twitter#1.after_march16#4.qt_pre_foll "4th quintile" ///
		1.twitter#1.after_march16#5.qt_pre_foll "5th quintile")
* Panel B: Initial topic popularity (reference category: 1st quintile)
eststo clear
quietly eststo: glm likes twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs ///
		scalars("N Observations") sfmt(0) ///
	    varwidth(30) modelwidth(13) ///
	    keep(1.twitter#1.after_march16#2* 1.twitter#1.after_march16#3* 1.twitter#1.after_march16#4* ///
		1.twitter#1.after_march16#5*) ///
	    coeflabels(1.twitter#1.after_march16#2.qt_topic_likes "2nd quintile" ///
		1.twitter#1.after_march16#3.qt_topic_likes "3rd quintile" ///
		1.twitter#1.after_march16#4.qt_topic_likes "4th quintile" ///
		1.twitter#1.after_march16#5.qt_topic_likes "5th quintile") ///
		addnotes("Notes: $notes_nbreg $notes_windows $notes_controls_interact $notes_se" ///
		"* p < 0.10, ** p < 0.05, *** p < 0.01")

	
* Figure 2: Effects of algorithmic curation on user engagement, by pre-switch outlet popularity
gen zero = 0
* A: Number of likes
quietly glm likes zero twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
est sto a
quietly glm likes zero twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
est sto b
quietly glm likes zero twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
est sto c
coefplot (a, msymbol(O)) (b, msymbol(S)) (c, msymbol(T)), eform vert ///
 keep(zero 1.twitter#1.after_march16#*) scheme(s2mono) msize(small) recast(box) ciopt(lcolor(gs8)) ///
 levels(95) ylabel(0(1)4, gmin gmax angle(0)) mcolor(black) graphregion(color(white)) ///
 yline(1, lpattern(dash) lcolor(gs10)) omitted scale(1.3) aspect(0.7) ///
 ytitle("Incidence rate ratio") xtitle(" " "Followers (quintiles)") ///
 xlabel(1 "1st" 2 "2nd" 3 "3rd" 4 "4th" 5 "5th", angle(45)) ///
 legend(label(2 "+/- 7 days") label(4 "+/- 14 days") label(6 "+/- 30 days") rows(1) pos(12) si(small))
* B: Number of retweets/shares
quietly glm shares zero twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
est sto a
quietly glm shares zero twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
est sto b
quietly glm shares zero twitter##after_march16##qt_pre_foll i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
est sto c
coefplot (a, msymbol(O)) (b, msymbol(S)) (c, msymbol(T)), eform vert ///
 keep(zero 1.twitter#1.after_march16#*) scheme(s2mono) msize(small) ciopt(lcolor(gs8)) ///
 levels(95) ylabel(0(1)4, gmin gmax angle(0)) mcolor(black) graphregion(color(white)) ///
 yline(1, lpattern(dash) lcolor(gs10)) omitted scale(1.3) aspect(0.7) ///
 ytitle("Incidence rate ratio") xtitle(" " "Followers (quintiles)") ///
 xlabel(1 "1st" 2 "2nd" 3 "3rd" 4 "4th" 5 "5th", angle(45)) ///
 legend(label(2 "+/- 7 days") label(4 "+/- 14 days") label(6 "+/- 30 days") rows(1) pos(12) si(small))
 

* Figure 3: Effects of algorithmic curation on user engagement, by pre-switch topic popularity
* A: Number of likes
quietly glm likes zero twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
est sto a
quietly glm likes zero twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
est sto b
quietly glm likes zero twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
est sto c
coefplot (a, msymbol(O)) (b, msymbol(S)) (c, msymbol(T)), eform vert ///
 keep(zero 1.twitter#1.after_march16#*) scheme(s2mono) msize(small) ciopt(lcolor(gs8)) ///
 levels(95) ylabel(0(1)4, gmin gmax angle(0)) mcolor(black) graphregion(color(white)) ///
 yline(1, lpattern(dash) lcolor(gs10)) omitted scale(1.3) aspect(0.7) ///
 ytitle("Incidence rate ratio") xtitle(" " "Topic likes (quintiles)") ///
 xlabel(1 "1st" 2 "2nd" 3 "3rd" 4 "4th" 5 "5th", angle(45)) ///
 legend(label(2 "+/- 7 days") label(4 "+/- 14 days") label(6 "+/- 30 days") rows(1) pos(12) si(small))
* B: Number of retweets/shares
quietly glm shares zero twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
est sto a
quietly glm shares zero twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
est sto b
quietly glm shares zero twitter##after_march16##qt_topic_likes i.date i.outlet_platform_id ///
 outlet_trend* if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
est sto c
coefplot (a, msymbol(O)) (b, msymbol(S)) (c, msymbol(T)), eform vert ///
 keep(zero 1.twitter#1.after_march16#*) scheme(s2mono) msize(small) ciopt(lcolor(gs8)) ///
 levels(95) ylabel(0(1)4, gmin gmax angle(0)) mcolor(black) graphregion(color(white)) ///
 yline(1, lpattern(dash) lcolor(gs10)) omitted scale(1.3) aspect(0.7) ///
 ytitle("Incidence rate ratio") xtitle(" " "Topic likes (quintiles)") ///
 xlabel(1 "1st" 2 "2nd" 3 "3rd" 4 "4th" 5 "5th", angle(45)) ///
 legend(label(2 "+/- 7 days") label(4 "+/- 14 days") label(6 "+/- 30 days") rows(1) pos(12) si(small))	
		
	
		

*Table 3: Effects of algorithmic curation on user engagement, by text characteristics
use "fb tw micro data w vars.dta", clear
global notes_nbreg = "The table shows exponentiated coefficients (i.e., incidence rate ratios) of negative binomial regressions."
global notes_se = "Standard errors (in parentheses) are clustered at the outlet-platform level."
global notes_windows = "The column headers denote the dependent variable and estimation window."
global notes_controls_interact = "All models include a constant, day fixed effects, outlet-platform fixed effects, an outlet-platform specific trend polynomial of order 3, and all constituent terms of the interactions (output omitted)."

eststo clear
quietly eststo: glm likes twitter##after_march16##start_all twitter##after_march16##c.number_of_words ///
 twitter##after_march16##c.mean_word_length twitter##after_march16##c.questionmarks ///
 twitter##after_march16##c.exclamationmarks twitter##after_march16##c.share_neg_words ///
 twitter##after_march16##c.share_pos_words i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##start_all twitter##after_march16##c.number_of_words ///
 twitter##after_march16##c.mean_word_length twitter##after_march16##c.questionmarks ///
 twitter##after_march16##c.exclamationmarks twitter##after_march16##c.share_neg_words ///
 twitter##after_march16##c.share_pos_words i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm likes twitter##after_march16##start_all twitter##after_march16##c.number_of_words ///
 twitter##after_march16##c.mean_word_length twitter##after_march16##c.questionmarks ///
 twitter##after_march16##c.exclamationmarks twitter##after_march16##c.share_neg_words ///
 twitter##after_march16##c.share_pos_words i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##start_all twitter##after_march16##c.number_of_words ///
 twitter##after_march16##c.mean_word_length twitter##after_march16##c.questionmarks ///
 twitter##after_march16##c.exclamationmarks twitter##after_march16##c.share_neg_words ///
 twitter##after_march16##c.share_pos_words i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -7 & rel_date <= 7, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##start_all twitter##after_march16##c.number_of_words ///
 twitter##after_march16##c.mean_word_length twitter##after_march16##c.questionmarks ///
 twitter##after_march16##c.exclamationmarks twitter##after_march16##c.share_neg_words ///
 twitter##after_march16##c.share_pos_words i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -14 & rel_date <= 14, cluster(outlet_platform_id) link(log) family(nb)
quietly eststo: glm shares twitter##after_march16##start_all twitter##after_march16##c.number_of_words ///
 twitter##after_march16##c.mean_word_length twitter##after_march16##c.questionmarks ///
 twitter##after_march16##c.exclamationmarks twitter##after_march16##c.share_neg_words ///
 twitter##after_march16##c.share_pos_words i.date i.outlet_platform_id outlet_trend* ///
 if rel_date >= -30 & rel_date <= 30, cluster(outlet_platform_id) link(log) family(nb) 
esttab, eform star(* 0.10 ** 0.05 *** 0.01) se nonotes scalars("N Observations") sfmt(0) ///
        cells(b(fmt(3) star) se(par fmt(3))) nodepvar noobs varwidth(45) modelwidth(12) ///
		title(Table 3: Effects of algorithmic curation on user engagement, by text characteristics) ///
		mtitles("+/- 7 days" "+/- 14 days" "+/- 30 days" "+/- 7 days" "+/- 14 days" "+/- 30 days") ///
	    keep(1.twitter#1.after_march16#1.start_all 1.twitter#1.after_march16#c.number_of_words ///
        1.twitter#1.after_march16#c.mean_word_length 1.twitter#1.after_march16#c.questionmarks ///
        1.twitter#1.after_march16#c.exclamationmarks 1.twitter#1.after_march16#c.share_neg_words ///
		1.twitter#1.after_march16#c.share_pos_words) ///
	    coeflabels(1.twitter#1.after_march16#1.start_all "Twitter x after x starts with question word" ///
		1.twitter#1.after_march16#c.number_of_words "Twitter x after x number of words" ///
        1.twitter#1.after_march16#c.mean_word_length "Twitter x after x mean word length" ///
		1.twitter#1.after_march16#c.questionmarks "Twitter x after x number question marks" ///
        1.twitter#1.after_march16#c.exclamationmarks "Twitter x after x number exclamation marks" ///
		1.twitter#1.after_march16#c.share_neg_words "Twitter x after x share negative words" ///
		1.twitter#1.after_march16#c.share_pos_words "Twitter x after x share positive words") ///
		addnotes("Notes: $notes_nbreg $notes_windows $notes_controls_interact $notes_se" ///
		"* p < 0.10, ** p < 0.05, *** p < 0.01")
	

	
	
	
	
	
***********************
* parallel pre-trends *
***********************

* Figure A3: Estimated coefficients on interactions between the Twitter indicator and time dummies 
use "fb tw micro data w vars.dta", clear
keep if date >= td(04.03.2016) & date <= td(31.03.2016)

quietly tabulate date, generate(date_dummy)
xi I.date*I.twitter, prefix(_I)
gen _IdatXtwi_20517_1 = 0
replace _IdatXtwi_20517_1 = 1 if date == td(04.03.2016)
order _IdatXtwi_20517_1, before(_IdatXtwi_20518_1)
gen zero = 0

quietly gnbreg likes date_dummy1-date_dummy12 date_dummy14-date_dummy28 ///
 twitter _IdatXtwi_20517_1 - _IdatXtwi_20528_1 zero _IdatXtwi_20530_1 - _IdatXtwi_20544_1 ///
 i.outlet_platform_id, cluster(outlet_platform_id) irr
est sto w
* Panel A: Number of likes
coefplot w, eform vert keep(zero* *_IdatXtwi*) scheme(s2mono) msymbol(O) msize(small) ciopt(lcolor(gs8)) ///
 levels(95) ylabel(-0(0.5)3, angle(0) gmin gmax) mcolor(black) legend(off) graphregion(color(white)) ///
 yline(1) recast(connected) omitted ///
 ytitle("Coefficient value" " ") xline(13.5, lpattern(dash) lcolor(gs10) lwidth(medthick)) ///
 xlabel(1 "Mar 04" 3 "Mar 06" 5 "Mar 08" 7 "Mar 10" 9 "Mar 12" 11 "Mar 14" 13 "Mar 16" ///
 15 "Mar 18" 17 "Mar 20" 19 "Mar 22" 21 "Mar 24" 23 "Mar 26" 25 "Mar 28" 27 "Mar 30", angle(45))
* Notes: The graph shows the exponentiated coefficients from negative binomial regressions of the number of likes and the number of retweets/shares, respectively, on interactions between the Twitter dummy and time dummies, conditional on outlet-platform fixed effects. The grey dashed line marks the start of algorithmic curation after March 16, 2016. The grey solid spikes denote the 95% confidence interval, based on standard errors clustered at the outlet-platform-level.
 
test _IdatXtwi_20517_1 _IdatXtwi_20518_1 _IdatXtwi_20519_1 _IdatXtwi_20520_1 _IdatXtwi_20521_1 ///
_IdatXtwi_20522_1 _IdatXtwi_20523_1 _IdatXtwi_20524_1 _IdatXtwi_20525_1 _IdatXtwi_20526_1 ///
_IdatXtwi_20527_1 _IdatXtwi_20528_1

test _IdatXtwi_20530_1 _IdatXtwi_20531_1 _IdatXtwi_20532_1 _IdatXtwi_20533_1 _IdatXtwi_20534_1 ///
 _IdatXtwi_20535_1 _IdatXtwi_20536_1 _IdatXtwi_20537_1 _IdatXtwi_20538_1 _IdatXtwi_20539_1 ///
 _IdatXtwi_20540_1 _IdatXtwi_20541_1 _IdatXtwi_20542_1 _IdatXtwi_20543_1 _IdatXtwi_20544_1

 
 
quietly gnbreg shares date_dummy1-date_dummy12 date_dummy14-date_dummy28 ///
 twitter _IdatXtwi_20517_1 - _IdatXtwi_20528_1 zero _IdatXtwi_20530_1 - _IdatXtwi_20544_1 ///
 i.outlet_platform_id, cluster(outlet_platform_id) irr
est sto w
* Panel B: Number of retweets/shares
coefplot w, eform vert keep(zero* *_IdatXtwi*) scheme(s2mono) msymbol(O) msize(small) ciopt(lcolor(gs8)) ///
 levels(95) ylabel(-0(0.5)3, angle(0) gmin gmax) mcolor(black) legend(off) graphregion(color(white)) ///
 yline(1) recast(connected) omitted ///
 ytitle("Coefficient value" " ") xline(13.5, lpattern(dash) lcolor(gs10) lwidth(medthick)) ///
 xlabel(1 "Mar 04" 3 "Mar 06" 5 "Mar 08" 7 "Mar 10" 9 "Mar 12" 11 "Mar 14" 13 "Mar 16" ///
 15 "Mar 18" 17 "Mar 20" 19 "Mar 22" 21 "Mar 24" 23 "Mar 26" 25 "Mar 28" 27 "Mar 30", angle(45))

test _IdatXtwi_20517_1 _IdatXtwi_20518_1 _IdatXtwi_20519_1 _IdatXtwi_20520_1 _IdatXtwi_20521_1 ///
_IdatXtwi_20522_1 _IdatXtwi_20523_1 _IdatXtwi_20524_1 _IdatXtwi_20525_1 _IdatXtwi_20526_1 ///
_IdatXtwi_20527_1 _IdatXtwi_20528_1

test _IdatXtwi_20530_1 _IdatXtwi_20531_1 _IdatXtwi_20532_1 _IdatXtwi_20533_1 _IdatXtwi_20534_1 ///
 _IdatXtwi_20535_1 _IdatXtwi_20536_1 _IdatXtwi_20537_1 _IdatXtwi_20538_1 _IdatXtwi_20539_1 ///
 _IdatXtwi_20540_1 _IdatXtwi_20541_1 _IdatXtwi_20542_1 _IdatXtwi_20543_1 _IdatXtwi_20544_1

 
	
	
				
		
	
	